1 package org.apache.lucene.index;
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19 import java.io.IOException;
20 import java.util.ArrayList;
21 import java.util.Arrays;
22
23 import org.apache.lucene.analysis.MockAnalyzer;
24 import org.apache.lucene.document.Document;
25 import org.apache.lucene.document.Field;
26 import org.apache.lucene.document.FieldType;
27 import org.apache.lucene.document.TextField;
28 import org.apache.lucene.search.DocIdSetIterator;
29 import org.apache.lucene.store.Directory;
30 import org.apache.lucene.util.Bits;
31 import org.apache.lucene.util.BytesRef;
32 import org.apache.lucene.util.LuceneTestCase;
33 import org.apache.lucene.util.TestUtil;
34
35 public class TestDocsAndPositions extends LuceneTestCase {
36 private String fieldName;
37
38 @Override
39 public void setUp() throws Exception {
40 super.setUp();
41 fieldName = "field" + random().nextInt();
42 }
43
44
45
46
47 public void testPositionsSimple() throws IOException {
48 Directory directory = newDirectory();
49 RandomIndexWriter writer = new RandomIndexWriter(random(), directory,
50 newIndexWriterConfig(new MockAnalyzer(random())));
51 for (int i = 0; i < 39; i++) {
52 Document doc = new Document();
53 FieldType customType = new FieldType(TextField.TYPE_NOT_STORED);
54 customType.setOmitNorms(true);
55 doc.add(newField(fieldName, "1 2 3 4 5 6 7 8 9 10 "
56 + "1 2 3 4 5 6 7 8 9 10 " + "1 2 3 4 5 6 7 8 9 10 "
57 + "1 2 3 4 5 6 7 8 9 10", customType));
58 writer.addDocument(doc);
59 }
60 IndexReader reader = writer.getReader();
61 writer.close();
62
63 int num = atLeast(13);
64 for (int i = 0; i < num; i++) {
65 BytesRef bytes = new BytesRef("1");
66 IndexReaderContext topReaderContext = reader.getContext();
67 for (LeafReaderContext leafReaderContext : topReaderContext.leaves()) {
68 PostingsEnum docsAndPosEnum = getDocsAndPositions(
69 leafReaderContext.reader(), bytes);
70 assertNotNull(docsAndPosEnum);
71 if (leafReaderContext.reader().maxDoc() == 0) {
72 continue;
73 }
74 final int advance = docsAndPosEnum.advance(random().nextInt(leafReaderContext.reader().maxDoc()));
75 do {
76 String msg = "Advanced to: " + advance + " current doc: "
77 + docsAndPosEnum.docID();
78 assertEquals(msg, 4, docsAndPosEnum.freq());
79 assertEquals(msg, 0, docsAndPosEnum.nextPosition());
80 assertEquals(msg, 4, docsAndPosEnum.freq());
81 assertEquals(msg, 10, docsAndPosEnum.nextPosition());
82 assertEquals(msg, 4, docsAndPosEnum.freq());
83 assertEquals(msg, 20, docsAndPosEnum.nextPosition());
84 assertEquals(msg, 4, docsAndPosEnum.freq());
85 assertEquals(msg, 30, docsAndPosEnum.nextPosition());
86 } while (docsAndPosEnum.nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
87 }
88 }
89 reader.close();
90 directory.close();
91 }
92
93 public PostingsEnum getDocsAndPositions(LeafReader reader,
94 BytesRef bytes) throws IOException {
95 Terms terms = reader.terms(fieldName);
96 if (terms != null) {
97 TermsEnum te = terms.iterator();
98 if (te.seekExact(bytes)) {
99 return te.postings(null, PostingsEnum.ALL);
100 }
101 }
102 return null;
103 }
104
105
106
107
108
109
110
111 public void testRandomPositions() throws IOException {
112 Directory dir = newDirectory();
113 RandomIndexWriter writer = new RandomIndexWriter(random(), dir,
114 newIndexWriterConfig(new MockAnalyzer(random()))
115 .setMergePolicy(newLogMergePolicy()));
116 int numDocs = atLeast(47);
117 int max = 1051;
118 int term = random().nextInt(max);
119 Integer[][] positionsInDoc = new Integer[numDocs][];
120 FieldType customType = new FieldType(TextField.TYPE_NOT_STORED);
121 customType.setOmitNorms(true);
122 for (int i = 0; i < numDocs; i++) {
123 Document doc = new Document();
124 ArrayList<Integer> positions = new ArrayList<>();
125 StringBuilder builder = new StringBuilder();
126 int num = atLeast(131);
127 for (int j = 0; j < num; j++) {
128 int nextInt = random().nextInt(max);
129 builder.append(nextInt).append(" ");
130 if (nextInt == term) {
131 positions.add(Integer.valueOf(j));
132 }
133 }
134 if (positions.size() == 0) {
135 builder.append(term);
136 positions.add(num);
137 }
138 doc.add(newField(fieldName, builder.toString(), customType));
139 positionsInDoc[i] = positions.toArray(new Integer[0]);
140 writer.addDocument(doc);
141 }
142
143 IndexReader reader = writer.getReader();
144 writer.close();
145
146 int num = atLeast(13);
147 for (int i = 0; i < num; i++) {
148 BytesRef bytes = new BytesRef("" + term);
149 IndexReaderContext topReaderContext = reader.getContext();
150 for (LeafReaderContext leafReaderContext : topReaderContext.leaves()) {
151 PostingsEnum docsAndPosEnum = getDocsAndPositions(
152 leafReaderContext.reader(), bytes);
153 assertNotNull(docsAndPosEnum);
154 int initDoc = 0;
155 int maxDoc = leafReaderContext.reader().maxDoc();
156
157 if (random().nextBoolean()) {
158 initDoc = docsAndPosEnum.nextDoc();
159 } else {
160 initDoc = docsAndPosEnum.advance(random().nextInt(maxDoc));
161 }
162
163 do {
164 int docID = docsAndPosEnum.docID();
165 if (docID == DocIdSetIterator.NO_MORE_DOCS) {
166 break;
167 }
168 Integer[] pos = positionsInDoc[leafReaderContext.docBase + docID];
169 assertEquals(pos.length, docsAndPosEnum.freq());
170
171
172 final int howMany = random().nextInt(20) == 0 ? pos.length
173 - random().nextInt(pos.length) : pos.length;
174 for (int j = 0; j < howMany; j++) {
175 assertEquals("iteration: " + i + " initDoc: " + initDoc + " doc: "
176 + docID + " base: " + leafReaderContext.docBase
177 + " positions: " + Arrays.toString(pos)
178 , pos[j].intValue(), docsAndPosEnum.nextPosition());
179 }
180
181 if (random().nextInt(10) == 0) {
182 if (docsAndPosEnum.advance(docID + 1 + random().nextInt((maxDoc - docID))) == DocIdSetIterator.NO_MORE_DOCS) {
183 break;
184 }
185 }
186
187 } while (docsAndPosEnum.nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
188 }
189
190 }
191 reader.close();
192 dir.close();
193 }
194
195 public void testRandomDocs() throws IOException {
196 Directory dir = newDirectory();
197 RandomIndexWriter writer = new RandomIndexWriter(random(), dir,
198 newIndexWriterConfig(new MockAnalyzer(random()))
199 .setMergePolicy(newLogMergePolicy()));
200 int numDocs = atLeast(49);
201 int max = 15678;
202 int term = random().nextInt(max);
203 int[] freqInDoc = new int[numDocs];
204 FieldType customType = new FieldType(TextField.TYPE_NOT_STORED);
205 customType.setOmitNorms(true);
206 for (int i = 0; i < numDocs; i++) {
207 Document doc = new Document();
208 StringBuilder builder = new StringBuilder();
209 for (int j = 0; j < 199; j++) {
210 int nextInt = random().nextInt(max);
211 builder.append(nextInt).append(' ');
212 if (nextInt == term) {
213 freqInDoc[i]++;
214 }
215 }
216 doc.add(newField(fieldName, builder.toString(), customType));
217 writer.addDocument(doc);
218 }
219
220 IndexReader reader = writer.getReader();
221 writer.close();
222
223 int num = atLeast(13);
224 for (int i = 0; i < num; i++) {
225 BytesRef bytes = new BytesRef("" + term);
226 IndexReaderContext topReaderContext = reader.getContext();
227 for (LeafReaderContext context : topReaderContext.leaves()) {
228 int maxDoc = context.reader().maxDoc();
229 PostingsEnum postingsEnum = TestUtil.docs(random(), context.reader(), fieldName, bytes, null, PostingsEnum.FREQS);
230 if (findNext(freqInDoc, context.docBase, context.docBase + maxDoc) == Integer.MAX_VALUE) {
231 assertNull(postingsEnum);
232 continue;
233 }
234 assertNotNull(postingsEnum);
235 postingsEnum.nextDoc();
236 for (int j = 0; j < maxDoc; j++) {
237 if (freqInDoc[context.docBase + j] != 0) {
238 assertEquals(j, postingsEnum.docID());
239 assertEquals(postingsEnum.freq(), freqInDoc[context.docBase +j]);
240 if (i % 2 == 0 && random().nextInt(10) == 0) {
241 int next = findNext(freqInDoc, context.docBase+j+1, context.docBase + maxDoc) - context.docBase;
242 int advancedTo = postingsEnum.advance(next);
243 if (next >= maxDoc) {
244 assertEquals(DocIdSetIterator.NO_MORE_DOCS, advancedTo);
245 } else {
246 assertTrue("advanced to: " +advancedTo + " but should be <= " + next, next >= advancedTo);
247 }
248 } else {
249 postingsEnum.nextDoc();
250 }
251 }
252 }
253 assertEquals("docBase: " + context.docBase + " maxDoc: " + maxDoc + " " + postingsEnum.getClass(), DocIdSetIterator.NO_MORE_DOCS, postingsEnum.docID());
254 }
255
256 }
257
258 reader.close();
259 dir.close();
260 }
261
262 private static int findNext(int[] docs, int pos, int max) {
263 for (int i = pos; i < max; i++) {
264 if( docs[i] != 0) {
265 return i;
266 }
267 }
268 return Integer.MAX_VALUE;
269 }
270
271
272
273
274
275 public void testLargeNumberOfPositions() throws IOException {
276 Directory dir = newDirectory();
277 RandomIndexWriter writer = new RandomIndexWriter(random(), dir,
278 newIndexWriterConfig(new MockAnalyzer(random())));
279 int howMany = 1000;
280 FieldType customType = new FieldType(TextField.TYPE_NOT_STORED);
281 customType.setOmitNorms(true);
282 for (int i = 0; i < 39; i++) {
283 Document doc = new Document();
284 StringBuilder builder = new StringBuilder();
285 for (int j = 0; j < howMany; j++) {
286 if (j % 2 == 0) {
287 builder.append("even ");
288 } else {
289 builder.append("odd ");
290 }
291 }
292 doc.add(newField(fieldName, builder.toString(), customType));
293 writer.addDocument(doc);
294 }
295
296
297 IndexReader reader = writer.getReader();
298 writer.close();
299
300 int num = atLeast(13);
301 for (int i = 0; i < num; i++) {
302 BytesRef bytes = new BytesRef("even");
303
304 IndexReaderContext topReaderContext = reader.getContext();
305 for (LeafReaderContext leafReaderContext : topReaderContext.leaves()) {
306 PostingsEnum docsAndPosEnum = getDocsAndPositions(
307 leafReaderContext.reader(), bytes);
308 assertNotNull(docsAndPosEnum);
309
310 int initDoc = 0;
311 int maxDoc = leafReaderContext.reader().maxDoc();
312
313 if (random().nextBoolean()) {
314 initDoc = docsAndPosEnum.nextDoc();
315 } else {
316 initDoc = docsAndPosEnum.advance(random().nextInt(maxDoc));
317 }
318 String msg = "Iteration: " + i + " initDoc: " + initDoc;
319 assertEquals(howMany / 2, docsAndPosEnum.freq());
320 for (int j = 0; j < howMany; j += 2) {
321 assertEquals("position missmatch index: " + j + " with freq: "
322 + docsAndPosEnum.freq() + " -- " + msg, j,
323 docsAndPosEnum.nextPosition());
324 }
325 }
326 }
327 reader.close();
328 dir.close();
329 }
330
331 public void testDocsEnumStart() throws Exception {
332 Directory dir = newDirectory();
333 RandomIndexWriter writer = new RandomIndexWriter(random(), dir);
334 Document doc = new Document();
335 doc.add(newStringField("foo", "bar", Field.Store.NO));
336 writer.addDocument(doc);
337 DirectoryReader reader = writer.getReader();
338 LeafReader r = getOnlySegmentReader(reader);
339 PostingsEnum disi = TestUtil.docs(random(), r, "foo", new BytesRef("bar"), null, PostingsEnum.NONE);
340 int docid = disi.docID();
341 assertEquals(-1, docid);
342 assertTrue(disi.nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
343
344
345 TermsEnum te = r.terms("foo").iterator();
346 assertTrue(te.seekExact(new BytesRef("bar")));
347 disi = TestUtil.docs(random(), te, disi, PostingsEnum.NONE);
348 docid = disi.docID();
349 assertEquals(-1, docid);
350 assertTrue(disi.nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
351 writer.close();
352 r.close();
353 dir.close();
354 }
355
356 public void testDocsAndPositionsEnumStart() throws Exception {
357 Directory dir = newDirectory();
358 RandomIndexWriter writer = new RandomIndexWriter(random(), dir);
359 Document doc = new Document();
360 doc.add(newTextField("foo", "bar", Field.Store.NO));
361 writer.addDocument(doc);
362 DirectoryReader reader = writer.getReader();
363 LeafReader r = getOnlySegmentReader(reader);
364 PostingsEnum disi = r.postings(new Term("foo", "bar"), PostingsEnum.ALL);
365 int docid = disi.docID();
366 assertEquals(-1, docid);
367 assertTrue(disi.nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
368
369
370 TermsEnum te = r.terms("foo").iterator();
371 assertTrue(te.seekExact(new BytesRef("bar")));
372 disi = te.postings(disi, PostingsEnum.ALL);
373 docid = disi.docID();
374 assertEquals(-1, docid);
375 assertTrue(disi.nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
376 writer.close();
377 r.close();
378 dir.close();
379 }
380 }